/* OPTIONS OBS=500 NOREPLACE;  */

options mprint mlogic symbolgen;


/** This program, estimate_tfp_dispersion_by_year.sas, 
    estimates within-industry within-year TFP dispersion
    for 2002 and 2007.
***/

%include "ASMimplibs.sas";


/* Construct variables for production function estimation. */

%MACRO construct_pf_vars(data=,yr=);


DATA concrete_&data.&yr;
 SET concrete_&data.&yr;
  if tvs>0 then lnq = log(tvs);
  if ph > 0 then pw_wage = ww/ph;  
  else pw_wage=.; 
  if pw_wage>0
  then pwhours = sw/pw_wage;  
  else pwhours = .;
  if pwhours>0 then lth = log(pwhours);  
  if energy > 0 then le=log(energy);   
  mat = cm - energy;
  if mat > 0 then lm = log(mat);   * log of (nominal) intermediate inputs (minus energy);
  if tab>0 then lk = log(tab);     * log of nominal book value of assets, beginning of year;
run;

%MEND;


/* Estimate production functions on a single dataset. */

%MACRO estimate_pf_single(data=,year=);

proc reg data=concrete_&data.&year;
QKLEM: model lnQ = lK lth lE lM; 
CRS: test lK+lth+lE+lM=1;
 output out=concrete_resids_&data.&year
       residual=uhat
       predicted=lnQhat;
 title1 "Production function regression on &data data";
 title2 "20&year CMF non-AR tabulated concrete mfg plants";
run;

data concrete_cs_&year;
 set indcs.cost_shares_concrete;
 if year = 20&year;
run;

/* Merge industry average cost shares with plant-level residuals data 
   and compute residuals using industry cost-shares for output elasticities.*/

data concrete_resids_&data.&year;
 if _N_ = 1 then set concrete_cs_&year;
 set concrete_resids_&data.&year;
 cs_resid =  lnq - ((iake + iaks)*lK + ial*lth + iae*lE + iam*lM);
run; 

%MEND;



%MACRO estimate_TFP_on_single(data=,year=,var=);

proc sort data=concrete_resids_&data.&year;
 by BEA_CEA_Code;
run;


/* Keep only the plants for which we can compute the TFP residual. */
/* Set a dummy variable for counting. */
data concrete_resids_&data.&year;
 set concrete_resids_&data.&year;
 tfp_f=1;
 if &var ne . ;
run;

/* Save the residuals */
data concrete.concrete_resids_&data.&year;
 set concrete_resids_&data.&year;
run;


/** Create a count of plants with TFP within each CEA, 
    and merge the counts back with the residuals. 
**/
proc freq data=concrete_resids_&data.&year;
by BEA_CEA_Code;
 tables tfp_f / out=resid_counts_&data.&year NOPRINT;
run;


data resid_counts_&data.&year (keep = BEA_CEA_Code count);
 set resid_counts_&data.&year;
run;

data concrete_resids_&data.&year (drop = tfp_f);
 merge concrete_resids_&data.&year resid_counts_&data.&year;
 by BEA_CEA_Code;
run;

/* Compute TFP dispersion statistics for the entire ready-mix concrete industry. */


/* proc contents data=concrete_resids_&data.&year; run; */

proc means data=concrete_resids_&data.&year N p10 q1 q3 p90 stddev NOPRINT;
 var &var;
 output out=concrete_resids_&data.&year._iqrs (keep = N_&var ltfp_p10_&var ltfp_q1_&var ltfp_q3_&var ltfp_p90_&var ltfp_sd_&var) N=N_&var p10=ltfp_p10_&var q1=ltfp_q1_&var q3 = ltfp_q3_&var p90=ltfp_p90_&var stddev=ltfp_sd_&var;
run;

/**** PUT DISCLOSURE ANALYSIS HERE ******/



data concrete_resids_&data.&year._iqrs;
 set concrete_resids_&data.&year._iqrs;
 ltfp_iqr_&var = ltfp_q3_&var - ltfp_q1_&var;
 ltfp_90_10_&var = ltfp_p90_&var - ltfp_p10_&var;
 tfp_75_25_ratio_&var = exp(ltfp_iqr_&var);
 tfp_90_10_ratio_&var = exp(ltfp_90_10_&var);
run;

proc print data=concrete_resids_&data.&year._iqrs;
 var N_&var ltfp_sd_&var ltfp_iqr_&var ltfp_90_10_&var tfp_75_25_ratio_&var tfp_90_10_ratio_&var;
title1 "Measures of TFP dispersion, &data data, TFP=&var";
 title2 "&year CMF non-AR tabulated concrete mfg plants";
run;



%MEND;





/* Estimate production functions using the CART-completed samples.
   Combine the parameter estimates from the multiple implicates and compute overall standard errors using
   Rubin's combining formulas. */

%MACRO estimate_pf_on_CARTdata(year=,data=);

proc datasets library =work;
modify concrete_&data.&year;
rename _IMPUTE_ = _IMPUTATION_;
RUN;

proc sort data=concrete_&data.&year;
 by _IMPUTATION_;
run;

/***

proc reg data=concrete_&data.&year outest=outreg_concrete&year covout noprint;
QKLEM: model lnQ = lK lth lE lM; 
CRS: test lK+lth+lE+lM=1;
 output out=concrete_resids_&data.&year
       residual=uhat
       predicted=lnQhat;
by  _IMPUTATION_;
run;

proc mianalyze data=outreg_concrete&year;
 modeleffects lK lth LE LM ;
title1 "Production function regressions, CART-completed data";
title2 "20&year concrete manufacturing";
run;

****/
/* Merge industry average cost shares with plant-level residuals data 
   and compute residuals using industry cost-shares for output elasticities.*/

data resids_&data.&year;
 if _N_ = 1 then set concrete_cs_&year;
/* set concrete_resids_&data.&year; */
 set concrete_&data.&year;
 cs_resid =  lnQ - ((iake + iaks)*lK + ial*lth + iae*lE + lM);
run; 


%MEND;


/* Estimate TFP dispersion using the CART-completed samples. **/

%MACRO estimate_TFP_on_CARTdata(year=,data=,var=);

proc sort data=resids_&data.&year;
 by  _IMPUTATION_ ;
run;


/* Keep only the plants for which we can compute the TFP residual. */
/* Set a dummy variable for counting. */
data resids_&data.&year;
 set resids_&data.&year;
 tfp_f=1;
 if &var ne . ;
run;


/* Compute TFP dispersion statistics for the entire ready-mix concrete industry using CART-completed data */

proc means data=resids_&data.&year p10 q1 q3 p90 stddev NOPRINT;
 var &var;
 by _IMPUTATION_;
 output out=resids_&data.&year._iqrs (keep = _IMPUTATION_ ltfp_p10_&var ltfp_q1_&var ltfp_q3_&var ltfp_p90_&var ltfp_sd_&var) p10=ltfp_p10_&var q1=ltfp_q1_&var q3 = ltfp_q3_&var p90=ltfp_p90_&var stddev=ltfp_sd_&var;
 run;

data concrete.concrete_resids_&data.&year._iqrs;
 set resids_&data.&year._iqrs;
 ltfp_iqr_&var = ltfp_q3_&var - ltfp_q1_&var;
 ltfp_90_10_&var = ltfp_p90_&var - ltfp_p10_&var;
 tfp_75_25_ratio_&var = exp(ltfp_iqr_&var);
 tfp_90_10_ratio_&var = exp(ltfp_90_10_&var);
run;

ODS LISTING;


proc means data=concrete.concrete_resids_&data.&year._iqrs mean stddev;
 var ltfp_sd_&var ltfp_iqr_&var ltfp_90_10_&var tfp_75_25_ratio_&var tfp_90_10_ratio_&var;
title1 "Measures of TFP dispersion, 20&year &data data";
title2 "500-implicate means and between-imputation std. devs., concrete manufacturing";
run;


%MEND;




/*********************/
/**  BEGIN PROGRAM ***/
/*********************/


DATA concrete_CB02;
 SET concrete.concrete_0207_w_cb_imputes;
 if year=2002;
 energy = sum(cf,ee);
 tab = tab02;
run;



%construct_pf_vars(data=CB,yr=02);

DATA concrete_good02;
 SET concrete.concrete_0207_gooddata;
 if year=2002;
 tab = tab02;
run;



%construct_pf_vars(data=good,yr=02);


DATA concrete_CB07;
 SET concrete.concrete_0207_w_cb_imputes;
 if year=2007;
 energy = sum(cf,ee);
 tab = tab07;
run;


%construct_pf_vars(data=CB,yr=07);

DATA concrete_good07;
 SET concrete.concrete_0207_gooddata;
 if year=2007;
 tab = tab07;
run;


%construct_pf_vars(data=good,yr=07);

DATA concrete_CART02;
 SET concrete.concrete_imputes02;
run;
%construct_pf_vars(data=CART,yr=02);

DATA concrete_CART07;
 SET concrete.concrete_imputes07;
run;
%construct_pf_vars(data=CART,yr=07);

DATA concrete_pred02;
 SET concrete.concrete_predicted02;
run;
*%construct_pf_vars(data=pred,yr=02);

DATA concrete_pred07;
 SET concrete.concrete_predicted07;
run;
%construct_pf_vars(data=pred,yr=07);



%estimate_pf_single(data=good,year=02);
%estimate_pf_single(data=CB,year=02);

%estimate_pf_single(data=good,year=07);
%estimate_pf_single(data=CB,year=07);
%estimate_pf_on_CARTdata(year=07,data=CART);
%estimate_pf_on_CARTdata(year=07,data=pred);



* Now estimate TFP dispersion using the cost-share residuals;

%estimate_TFP_on_single(data=good,year=02,var=cs_resid);
%estimate_TFP_on_single(data=CB,year=02,var=cs_resid);
%estimate_TFP_on_CARTdata(year=02,data=CART,var=cs_resid);
%estimate_TFP_on_CARTdata(year=02,data=pred,var=cs_resid);

%estimate_TFP_on_single(data=good,year=07,var=cs_resid);
%estimate_TFP_on_single(data=CB,year=07,var=cs_resid);
%estimate_TFP_on_CARTdata(year=07,data=CART,var=cs_resid);
%estimate_TFP_on_CARTdata(year=07,data=pred,var=cs_resid);

